import requests
from bs4 import BeautifulSoup
from time import sleep
import xlsxwriter, time
def getHtml(url, m):
    start_time = time.time()
    html = requests.get(url)
    wb = xlsxwriter.Workbook('汽车之家.xlsx', {'constant_memory': True})
    ws = wb.add_worksheet('二手车信息')
    head = ['车辆型号', '关于', '现价', '原价', '链接']
    ws.write_row(0, 0, head)
    page = int(input('请输入爬取的页数:'))
    p = 1
    for i in range(page + 1):
        soup = BeautifulSoup(html.text, 'html.parser')
        lists = soup.select('a.carinfo')
        if i < page:
            print('共%s页,正在爬取第%s页......' % (page, p))
            result = []
            arr2 = []
            urls = 'https://www.che168.com/'
            k = 1
            for data in lists:
                if k < len(lists):
                    link = urls + data.get('href')
                    print('共%s条,正在爬取第%s条......url:%s' % (len(lists), k, link))
                    arr2.append(link)
                    str = data
                    name = str.select_one('h4').text
                    car = str.select_one('p').text
                    price = str.select_one('span').text
                    value = str.select_one('s').text
                    string = name + ',' + car + ',' + price + ',' + value
                    arr = string.split(',')
                    arr.extend(arr2)
                    arr2 = []
                    result.append(arr)
                    k = k + 1
                else:
                    break
            print(result)
            # 写入表格
            for val in result:
                for j in range(len(val)):
                    ws.write_row(m, 0, val)
                m = m + 1
                print(m)
            result.clear()
            next = soup.select_one('a.page-item-next').get('href')
            next_page = urls + next
            html = requests.get(next_page)
            sleep(2)
            p = p + 1
    wb.close()
    end_time = time.time()
    print('本次爬取共耗时%s秒' % (end_time - start_time))


if __name__ == "__main__":
    m = 1
    url = 'https://www.che168.com/haerbin/list/#pvareaid=100945'
    getHtml(url, m)
